*************************************************************************************
************** Project: Robots and Household Financial Behavior *********************
***************************  Gomes, Jansson and Karabulut ***************************
*************************** VARIABLE DEFINITIONS ***********************************
*************************************************************************************
****************************** LAST MODIFIED ****************************************
******************************** 2023-06-07 *****************************************
** Stata Settings ***
cap clear all
macro drop _all
clear
set mem 1000m
set matsize 11000
set maxvar 10000
set more off
**Define Paths**
global dataprev "DEFINE THE PATH HERE"
global data "DEFINE THE PATH HERE"
global projf "DEFINE THE PATH HERE"
**Construct the working Sample for Each Year Separetely**
forvalues i=1999(1)2007 {
*** Uploading the Dataset ***
**Saving a log file**
cap log close
log using "$projf/logs/RFS_revision/1_Var_Defs`i'_230425.log", replace
**Upload the dataset**
*Raw Data**
insheet using "$data/linda`i'230423R.csv", comma clear 
*************************************************************
*** Variable Definitions: Dependent and Control Variables ***
*************************************************************
*** Keep only regular LINDA (not the immigrant sample) ***
tab sampled_f sampled_i
keep if (sampled_f == 1|sampled_f == 2)
count
*** Dependent variables: asset market participation ***
**Being "banked"**
gen dbank=1 if hhbank>0
replace dbank=0 if hhbank==0
la var dbank "Having a bank account"             
**Risky asset market participation (defined as holding stocks or equity mutual funds)**
gen drisky=1 if hhstocksmutual>0
replace drisky=0 if hhstocksmutual==0
la var drisky "Risky asset holdings"
**Having Debt Outstanding**
gen debtow=1 if hhdebt>0
replace debtow=0 if hhdebt==0
la var debtow "Debt ownership"
**Homeownership**
gen homeow=1 if hhhouse>0
replace homeow=0 if hhhouse==0
la var homeow "Homeownership"
gen realeow=1 if hhrealassets>0
replace realeow=0 if hhrealassets==0
la var realeow "REownership"
*** Household control variables: Demographics and financials ***
**Inverse hyperbolic sine function (IHS) Parameter**
scalar theta=1  
**IHS of disposable income**
gen linc=log(theta*(inchh)+sqrt((theta*(inchh))^2+1))/theta
label var linc "Income"
**Permanent income proxy**
egen inchh_perm = rowmean(inchh_disp1995-inchh_disp1998 inchh)
gen linc_perm = log(theta*(inchh_perm)+sqrt((theta*(inchh_perm))^2+1))/theta
label var linc_perm "Perm Income"
sum linc linc_perm, d
**IHS of real estate wealth**
gen lre=log(theta*(hhrealassets)+sqrt((theta*(hhrealassets))^2+1))/theta
label var lre "Real estate wealth"
**IHS of household net wealth**
gen lnw=log(theta*(hhnetw)+sqrt((theta*(hhnetw))^2+1))/theta
label var lnw "Net wealth in logs"
**Share of Risky Assets in the Financial Wealth* 
gen risky_share=(hhrisky/hhfinw)
replace risky_share = 0 if hhfinw==0
sum risky_share hhrisky hhfinw, d
**Share of Risky Assets in the Financial Wealth and Real Estate Wealth* 
gen risky_share_re=((hhrisky+0.5*hhrealassets)/(hhfinw+hhrealassets))
replace risky_share_re = 0 if (hhfinw+hhrealassets)==0
sum risky_share_re, d
**Age Dummies**
gen dage1=1 if age<30
replace dage1=0 if dage1==.
label var dage1 "Age 18-30"
**
gen dage2=1 if (age>=30 & age<45)
replace dage2=0 if dage2==.
label var dage2 "Age 30-45"
**
gen dage3=1 if (age>=45 & age<60)
replace dage3=0 if dage3==.
label var dage3 "Age 45-60"
**
gen dage4=1 if age>=60 
replace dage4=0 if dage4==.
label var dage4 "Age 60-75"
**Age at Immigration**
*gen years_in_swe=(`i'-immi_year)
*gen age_immi=(age-years_in_swe)
**Gender Dummy**
tab sex,gen(gender)
drop gender2
gen male=gender1
la var male "Male"
**Profession Dummies**
**Self-Employment (defined at the household level)**
gen selfemployed=1 if enterpr_inc_hh>0
replace selfemployed=0 if enterpr_inc_hh==0
la var selfemployed "Self-Employed"
*Student*
la var student "Student"
*Employee*
gen employee=1 if (employed==1)
replace employee=0 if (employed==0)
la var employee "Employee"
*Unemployed*
la var unemployed "Unemployed"
*Retired*
la var retired "Retired"
**Marital status**
la var married "Married"
**Education level**
gen college = (edulev >= 4)
la var college "College and more Dummy"   
gen hschool = (edulev == 3)
la var hschool "High school Dummy"
gen nohschool = (edulev <= 2)
la var nohschool "Less than high school Dummy"
**Nbr of adults**
gen nr_adults=hh_adults
la var nr_adults "Nbr of adults"
**Nbr of children**
gen nr_children = hh_children
la var nr_children "Nbr of children"
**Swedish Spouse**
gen swespouse=1 if sp_birthcountry_code=="SE"
replace swespouse=0 if sp_birthcountry_code!="SE"
la var swespouse "Swedish spouse"
**Regional Immigrant Ratios**
la var reg_immi_rat "Regional immigrant ratio"
**Dummy for working in the financial sector**
la var finsector "Financial sector worker"
**Dummy for working in the central or local government sector**
la var govsector "Government sector worker"
**Dummy for being an Immigrant**
gen immigrant=1 if birthcountry_code!="SE"
replace immigrant=0 if birthcountry_code=="SE"
la var immigrant "Immigrant"
**Net Wealth Dummies for different asset classes**
*Quantiles for total net wealth*
gen lnetw_sav=log(theta*(hhnetw)+sqrt((theta*(hhnetw))^2+1))/theta
xtile pctile_sav = lnetw_sav,nq(4)
drop lnetw_sav lnetw_sav
tab pctile_sav, gen(hhnetw_sav)
drop pctile_sav
*Excluding both direct and indirect stocks from the net wealth*
gen hhnetw_st=(hhnetw-hhstocksmutual)
gen lnetw_st=log(theta*(hhnetw_st)+sqrt((theta*(hhnetw_st))^2+1))/theta
xtile pctile_st = lnetw_st,nq(4)
drop hhnetw_st lnetw_st
tab pctile_st, gen(hhnetw_stocks)
drop pctile_st
**Re-Define the SNI codes to match with the Robot data*
*Convert the SNI variable to a string variable
tostring sni_2, gen(SNI2_s)
*Agriculture, Fishery
replace SNI2_s="01-05" if (sni_2==1|sni_2==2|sni_2==3|sni_2==4|sni_2==5)
*Food and beverages; tobacco
replace SNI2_s="15-16" if (sni_2==15|sni_2==16)
*Textiles
replace SNI2_s="17-18-19" if (sni_2==17|sni_2==18|sni_2==19)
*Wood and Furniture; Paper
replace SNI2_s="20-21-22" if (sni_2==20|sni_2==21|sni_2==22)
*Pharmaceuticals, other chemical products
replace SNI2_s="23-24" if (sni_2==23|sni_2==24)
*Rubber and plastics; Other Chemical products; glass, ceramics, etc.
replace SNI2_s="25-26" if (sni_2==25|sni_2==26)
*Basic metals, metal products
replace SNI2_s="27-28" if (sni_2==27|sni_2==28)
*Industrial machinery
replace SNI2_s="29" if (sni_2==29)
*Electrical/Electronics 
replace SNI2_s="30-31-32-33" if (sni_2==30|sni_2==31|sni_2==32|sni_2==33)
*Automative
replace SNI2_s="34-35" if (sni_2==34|sni_2==35)
*Mining
replace SNI2_s="99998" if (sni_2==10|sni_2==11|sni_2==12|sni_2==13|sni_2==14)
*Utilities
replace SNI2_s="99997" if (sni_2==40|sni_2==41)
*Construction
replace SNI2_s="99996" if (sni_2==45)
*Education/Research
replace SNI2_s="99995" if (sni_2==80)
tab sni_2 SNI2_s
**Generate a Year variable if not already exiting**
cap gen year=`i' 
**Define Cohorts by Birth Year - 5 years within a cohort**
*Generate a Variable for Birth Year*
gen birthyear=(`i'-age)
**Alternative Wealth Rank Variable**
*Cohort Variable*
gen cohort=1 if birthyear>1923 & birthyear<1929
replace cohort=2 if birthyear>1928 & birthyear<1934
replace cohort=3 if birthyear>1933 & birthyear<1939
replace cohort=4 if birthyear>1938 & birthyear<1944
replace cohort=5 if birthyear>1943 & birthyear<1949
replace cohort=6 if birthyear>1948 & birthyear<1954
replace cohort=7 if birthyear>1953 & birthyear<1959
replace cohort=8 if birthyear>1958 & birthyear<1964
replace cohort=9 if birthyear>1963 & birthyear<1969
replace cohort=10 if birthyear>1968 & birthyear<1974
replace cohort=11 if birthyear>1973 & birthyear<1979
replace cohort=12 if birthyear>1978 & birthyear<1983
*Tabulate the Distribution of the Cohort*
tab cohort
*Wealth Percentile Rank by Cohort-Year*
sum hhnetw, d
forvalues k=1(1)12 {
	xtile pct_nw_`k'=hhnetw if cohort==`k', nq(100)
	}
*Define the dependent variable*
egen pct_nw_rank=rowmean(pct_nw_1-pct_nw_12)
*Drop irrelevant variables*
drop pct_nw_1-pct_nw_12
************************************************************
*Wealth Decile Rank by Cohort-Year*
sum hhnetw, d
forvalues k=1(1)12 {
	xtile dec_nw_`k'=hhnetw if cohort==`k', nq(10)
	}
*Define the dependent variable*
egen dec_nw_rank=rowmean(dec_nw_1-dec_nw_12)
*Drop irrelevant variables*
drop dec_nw_1-dec_nw_12
**************************************************************
*Financial Wealth Percentile Rank by Cohort-Year*
sum hhfinw, d
forvalues k=1(1)12 {
	xtile pct_fw_`k'=hhfinw if cohort==`k', nq(100)
	}
*Define the dependent variable*
egen pct_fw_rank=rowmean(pct_fw_1-pct_fw_12)
*Drop irrelevant variables*
drop pct_fw_1-pct_fw_12
************************************************************
*Financial Wealth Decile Rank by Cohort-Year*
sum hhfinw, d
forvalues k=1(1)12 {
	xtile dec_fw_`k'=hhfinw if cohort==`k', nq(10)
	}
*Define the dependent variable*
egen dec_fw_rank=rowmean(dec_fw_1-dec_fw_12)
*Drop irrelevant variables*
drop dec_fw_1-dec_fw_12
************************************************************
*Rank in the Income Distribution**
*Income Percentile Rank by Cohort-Year**
forvalues t=1(1)12 {
	xtile pct_inc_`t'=linc if cohort==`t', nq(100)
	}
*Define the dependent variable*
egen pct_inc_rank=rowmean(pct_inc_1-pct_inc_12)
*Drop irrelevant variables*
drop pct_inc_1-pct_inc_12
*Rank in the Earnings Distribution**
*Earnings Percentile Rank by Cohort-Year**
forvalues t=1(1)12 {
	xtile pct_earn_`t'=inchh_earned if cohort==`t', nq(100)
	}
*Define the dependent variable*
egen pct_earn_rank=rowmean(pct_earn_1-pct_earn_12)
*Drop irrelevant variables*
drop pct_earn_1-pct_earn_12
************************************************************
*Earnings Decile Rank by Cohort-Year**
forvalues t=1(1)12 {
	xtile dec_earn_`t'=inchh_earned if cohort==`t', nq(10)
	}
*Define the dependent variable*
egen dec_earn_rank=rowmean(dec_earn_1-dec_earn_12)
*Drop irrelevant variables*
drop dec_earn_1-dec_earn_12
************************************************************
**Save the Working Sample as Stata format**
save "$projf/data/linda_sample230423_`i'.dta", replace
count
**Close the log-file
cap log close
 }
